# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
# Common imports
import numpy as np
import os
# to make this notebook's output stable across runs
np.random.seed(42)
# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "fundamentals"
def save_fig(fig_id, tight_layout=True):
path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format='png', dpi=300)
# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")
# Load the model weights into memory and print a summary of the loaded model.
# summarize feature map size for each conv layer
from tensorflow.keras.applications.vgg16 import VGG16
from matplotlib import pyplot
# load the model
model = VGG16()
#model = "/Users/anthonysutton/ml2/tensorflow-for-poets-2/tf_files/retrained_graph.pb"
# summarize feature map shapes
for i in range(len(model.layers)):
layer = model.layers[i]
# check for convolutional layer
if 'conv' not in layer.name:
continue
# summarize output shape
print(i, layer.name, layer.output.shape)
# visualize feature maps output from each block in the vgg model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Model
from matplotlib import pyplot
from numpy import expand_dims
# load the model
model = VGG16()
# redefine model to output right after the first hidden layer
ixs = [2, 5, 9, 13, 17]
outputs = [model.layers[i].output for i in ixs]
model = Model(inputs=model.inputs, outputs=outputs)
# load the image with the required shape
img = load_img('ldd_image.jpg', target_size=(224, 224))
# convert the image to an array
img = img_to_array(img)
# expand dimensions so that it represents a single 'sample'
img = expand_dims(img, axis=0)
# prepare the image (e.g. scale pixel values for the vgg)
img = preprocess_input(img)
# get feature map for first hidden layer
feature_maps = model.predict(img)
# plot the output from each block
square = 8
ctr = 1
for fmap in feature_maps:
ctr += 1
print(ctr)
# plot all 64 maps in an 8x8 squares
fig=pyplot.figure(figsize=(18, 16), dpi= 80, facecolor='w', edgecolor='k')
ix = 1
for _ in range(square):
for _ in range(square):
# specify subplot and turn of axis
ax = pyplot.subplot(square, square, ix)
ax.set_xticks([])
ax.set_yticks([])
# plot filter channel in grayscale
pyplot.imshow(fmap[0, :, :, ix-1], cmap='gray')
ix += 1
# show the figure
pyplot.savefig(str(ctr) + "conv_net.png", dpi=80)
pyplot.show()
Python Scripting by Jacob Gil: https://github.com/jacobgil/keras-grad-cam
Usage: python grad-cam.py
from keras.applications.mobilenet_v2 import (
MobileNetV2, preprocess_input, decode_predictions)
from keras.preprocessing import image
from keras.layers.core import Lambda
from keras.models import Sequential
from tensorflow.python.framework import ops
import keras.backend as K
import tensorflow as tf
import numpy as np
import keras
import sys
import cv2
from keras.models import Model
def _compute_gradients(tensor, var_list):
grads = tf.gradients(tensor, var_list)
return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]
def target_category_loss(x, category_index, nb_classes):
return tf.multiply(x, K.one_hot([category_index], nb_classes))
def target_category_loss_output_shape(input_shape):
return input_shape
def normalize(x):
# utility function to normalize a tensor by its L2 norm
return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
def load_image(path):
img_path = sys.argv[1]
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x
def register_gradient():
if "GuidedBackProp" not in ops._gradient_registry._registry:
@ops.RegisterGradient("GuidedBackProp")
def _GuidedBackProp(op, grad):
dtype = op.inputs[0].dtype
return grad * tf.cast(grad > 0., dtype) * \
tf.cast(op.inputs[0] > 0., dtype)
def compile_saliency_function(model, activation_layer='Conv_1'):
input_img = model.input
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
layer_output = layer_dict[activation_layer].output
max_output = K.max(layer_output, axis=3)
saliency = K.gradients(K.sum(max_output), input_img)[0]
return K.function([input_img, K.learning_phase()], [saliency])
def modify_backprop(model, name):
g = tf.get_default_graph()
with g.gradient_override_map({'Relu': name}):
# get layers that have an activation
layer_dict = [layer for layer in model.layers[1:]
if hasattr(layer, 'activation')]
# replace relu activation
for layer in layer_dict:
if layer.activation == keras.activations.relu:
layer.activation = tf.nn.relu
# re-instanciate a new model
new_model = MobileNetV2(weights='imagenet')
return new_model
def deprocess_image(x):
'''
Same normalization as in:
https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
'''
if np.ndim(x) > 3:
x = np.squeeze(x)
# normalize tensor: center on 0., ensure std is 0.1
x -= x.mean()
x /= (x.std() + 1e-5)
x *= 0.1
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
#if K.image_dim_ordering() == 'th':
if keras.backend.image_data_format() == 'th':
x = x.transpose((1, 2, 0))
x = np.clip(x, 0, 255).astype('uint8')
return x
def grad_cam(input_model, image, category_index, layer_name):
nb_classes = 1000
target_layer = lambda x: target_category_loss(x, category_index, nb_classes)
x = Lambda(target_layer, output_shape = target_category_loss_output_shape)(input_model.output)
model = Model(inputs=input_model.input, outputs=x)
model.summary()
loss = K.sum(model.output)
conv_output = [l for l in model.layers if l.name is layer_name][0].output
grads = normalize(_compute_gradients(loss, [conv_output])[0])
gradient_function = K.function([model.input], [conv_output, grads])
output, grads_val = gradient_function([image])
output, grads_val = output[0, :], grads_val[0, :, :, :]
weights = np.mean(grads_val, axis = (0, 1))
cam = np.ones(output.shape[0 : 2], dtype = np.float32)
for i, w in enumerate(weights):
cam += w * output[:, :, i]
cam = cv2.resize(cam, (224, 224))
cam = np.maximum(cam, 0)
heatmap = cam / np.max(cam)
#Return to BGR [0..255] from the preprocessed image
image = image[0, :]
image -= np.min(image)
image = np.minimum(image, 255)
cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
cam = np.float32(cam) + np.float32(image)
cam = 255 * cam / np.max(cam)
return np.uint8(cam), heatmap
preprocessed_input = load_image(sys.argv[1])
model = MobileNetV2(weights='imagenet')
predictions = model.predict(preprocessed_input)
top_1 = decode_predictions(predictions)[0][0]
print('Predicted class:')
print('%s (%s) with probability %.2f' % (top_1[1], top_1[0], top_1[2]))
predicted_class = np.argmax(predictions)
cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "Conv_1")
cv2.imwrite("gradcam.jpg", cam)
register_gradient()
guided_model = modify_backprop(model, 'GuidedBackProp')
saliency_fn = compile_saliency_function(guided_model)
saliency = saliency_fn([preprocessed_input, 0])
gradcam = saliency[0] * heatmap[..., np.newaxis]
cv2.imwrite("guided_gradcam.jpg", deprocess_image(gradcam))
MobileNet v2
ELI5 is a Python package which helps to debug machine learning classifiers and explain their predictions.
cd ml3 activate env3
from PIL import Image
from IPython.display import display
import numpy as np
# you may want to keep logging enabled when doing your own work
import logging
import tensorflow as tf
#tf.get_logger().setLevel(logging.ERROR)
import warnings
warnings.simplefilter("ignore")
import keras
from keras.applications import mobilenet_v2
import eli5
model = mobilenet_v2.MobileNetV2(include_top=True, weights='imagenet', classes=1000)
# check the input format
print(model.input_shape)
dims = model.input_shape[1:3] # -> (height, width)
print(dims)
image_uri = 'mis_flat_true_office.jpg'
#image_uri = 'mis_house_true_office.jpg'
# check the image with Pillow
im = Image.open(image_uri)
print(type(im))
display(im)
# we could resize the image manually
# but instead let's use a utility function from `keras.preprocessing`
# we pass the required dimensions as a (height, width) tuple
im = keras.preprocessing.image.load_img(image_uri, target_size=dims) # -> PIL image
print(im)
display(im)
# we use a routine from `keras.preprocessing` for that as well
# we get a 'doc', an object almost ready to be inputted into the model
doc = keras.preprocessing.image.img_to_array(im) # -> numpy array
print(type(doc), doc.shape)
#get the batch size
#numpy routine to create an axis in the first position
doc = np.expand_dims(doc, axis=0)
print(type(doc), doc.shape)
# `keras.applications` models come with their own input preprocessing function
# for best results, apply that as well
# mobilenetv2-specific preprocessing
# (this operation is in-place)
mobilenet_v2.preprocess_input(doc)
print(type(doc), doc.shape)
# take back the first image from our 'batch'
image = keras.preprocessing.image.array_to_img(doc[0])
print(image)
display(image)
# make a prediction about our sample image
predictions = model.predict(doc)
print(type(predictions), predictions.shape)
# check the top 5 indices
# `keras.applications` contains a function for that
top = mobilenet_v2.decode_predictions(predictions)
top_indices = np.argsort(predictions)[0, ::-1][:5]
print(top)
print(top_indices)
# we need to pass the network
# the input as a numpy array
eli5.show_prediction(model, doc)
eli5.show_prediction(model, doc, image=image)
#make the model classify other objects and check where the classifier looks to find those objects
#cat_idx = 282 # ImageNet ID for "tiger_cat" class, because we have a cat in the picture
#eli5.show_prediction(model, doc, targets=[cat_idx]) # pass the class id
#window_idx = 904 # 'window screen'
#turtle_idx = 35 # 'mud turtle', some nonsense
#display(eli5.show_prediction(model, doc, targets=[window_idx]))
#display(eli5.show_prediction(model, doc, targets=[turtle_idx]))
# Under the hood Grad-CAM takes a hidden layer inside the network and
# ifferentiates it with respect to the output scores. We have the ability
#to choose which hidden layer we do our computations on
# we could use model.summary() here, but the model has over 100 layers.
# we will only look at the first few and last few layers
head = model.layers[:5]
tail = model.layers[-8:]
def pretty_print_layers(layers):
for l in layers:
info = [l.name, type(l).__name__, l.output_shape, l.count_params()]
pretty_print(info)
def pretty_print(lst):
s = ',\t'.join(map(str, lst))
print(s)
pretty_print(['name', 'type', 'output shape', 'param. no'])
print('-'*100)
pretty_print([model.input.name, type(model.input), model.input_shape, 0])
pretty_print_layers(head)
print()
print('...')
print()
pretty_print_layers(tail)
# pick a few convolutional layers that are 'far apart' and do Grad-CAM on them:
for l in ['block_2_expand', 'block_9_expand', 'Conv_1']:
print(l)
display(eli5.show_prediction(model, doc, layer=l)) # we pass the layer as an argument
#The layer parameter accepts a layer instance, index, name, or None (get layer automatically) as its arguments.
#This is where Grad-CAM builds its heatmap from.
expl = eli5.explain_prediction(model, doc)
print(expl)
# we can access the various attributes of a target being explained
print((expl.targets[0].target, expl.targets[0].score, expl.targets[0].proba))
image = expl.image
heatmap = expl.targets[0].heatmap
display(image) # the .image attribute is a PIL image
print(heatmap) # the .heatmap attribute is a numpy array
#Visualizing the heatmap:
heatmap_im = eli5.formatters.image.heatmap_to_image(heatmap)
display(heatmap_im)
#resize
heatmap_im = eli5.formatters.image.expand_heatmap(heatmap, image, resampling_filter=Image.BOX)
display(heatmap_im)
#overlay the heatmap over the original image
I = eli5.format_as_image(expl)
display(I)
# colorisation of the heatmap
# opacity
import matplotlib.cm
I = eli5.format_as_image(expl, alpha_limit=1.0, colormap=matplotlib.cm.cividis)
display(I)
#swap the softmax (logits) layer of our current model with a linear (no activation) layer,
# first check the explanation *with* softmax
print('with softmax')
display(eli5.show_prediction(model, doc))
# remove softmax
l = model.get_layer(index=-1) # get the last (output) layer
l.activation = keras.activations.linear # swap activation
# save and load back the model as a trick to reload the graph
model.save('tmp_model_save_rmsoftmax') # note that this creates a file of the model
model = keras.models.load_model('tmp_model_save_rmsoftmax')
print('without softmax')
display(eli5.show_prediction(model, doc))
#nasnet comparison
from keras.applications import nasnet
model2 = nasnet.NASNetMobile(include_top=True, weights='imagenet', classes=1000)
# we reload the image array to apply nasnet-specific preprocessing
doc2 = keras.preprocessing.image.img_to_array(im)
doc2 = np.expand_dims(doc2, axis=0)
nasnet.preprocess_input(doc2)
print(model.name)
# note that this model is without softmax
display(eli5.show_prediction(model, doc))
print(model2.name)
display(eli5.show_prediction(model2, doc2))
#vgg comparison
from keras.applications import vgg19
model3 = vgg19.VGG19(include_top=True, weights='imagenet', classes=1000)
# we reload the image array to apply nasnet-specific preprocessing
doc3 = keras.preprocessing.image.img_to_array(im)
doc3 = np.expand_dims(doc3, axis=0)
vgg19.preprocess_input(doc3)
print(model.name)
# note that this model is without softmax
display(eli5.show_prediction(model, doc))
print(model3.name)
display(eli5.show_prediction(model2, doc3))